package com.shujia.spark

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo6Sample {
  def main(args: Array[String]): Unit = {
    val conf: SparkConf = new SparkConf()
      .setAppName("Demo5FlatMap")
      .setMaster("local")

    val sc = new SparkContext(conf)

    val studentRDD: RDD[String] = sc.textFile("data/students.txt")

    /**
      * sample: 抽样， 不是精确抽样
      * withReplacement: 是否放回
      * fraction： 抽样比例
      *
      */
    val sample: RDD[String] = studentRDD.sample(false, 0.1)

    sample.foreach(println)

  }

}
